{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第5章 合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>School</th>\n",
       "      <th>Class</th>\n",
       "      <th>ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Address</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Math</th>\n",
       "      <th>Physics</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1101</td>\n",
       "      <td>M</td>\n",
       "      <td>street_1</td>\n",
       "      <td>173</td>\n",
       "      <td>63</td>\n",
       "      <td>34.0</td>\n",
       "      <td>A+</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1102</td>\n",
       "      <td>F</td>\n",
       "      <td>street_2</td>\n",
       "      <td>192</td>\n",
       "      <td>73</td>\n",
       "      <td>32.5</td>\n",
       "      <td>B+</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1103</td>\n",
       "      <td>M</td>\n",
       "      <td>street_2</td>\n",
       "      <td>186</td>\n",
       "      <td>82</td>\n",
       "      <td>87.2</td>\n",
       "      <td>B+</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1104</td>\n",
       "      <td>F</td>\n",
       "      <td>street_2</td>\n",
       "      <td>167</td>\n",
       "      <td>81</td>\n",
       "      <td>80.4</td>\n",
       "      <td>B-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1105</td>\n",
       "      <td>F</td>\n",
       "      <td>street_4</td>\n",
       "      <td>159</td>\n",
       "      <td>64</td>\n",
       "      <td>84.8</td>\n",
       "      <td>B+</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  School Class    ID Gender   Address  Height  Weight  Math Physics\n",
       "0    S_1   C_1  1101      M  street_1     173      63  34.0      A+\n",
       "1    S_1   C_1  1102      F  street_2     192      73  32.5      B+\n",
       "2    S_1   C_1  1103      M  street_2     186      82  87.2      B+\n",
       "3    S_1   C_1  1104      F  street_2     167      81  80.4      B-\n",
       "4    S_1   C_1  1105      F  street_4     159      64  84.8      B+"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "df = pd.read_csv('data/table.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 一、append与assign\n",
    "### 1. append方法\n",
    "#### （a）利用序列添加行（必须指定name）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Gender  Height\n",
       "0      M     173\n",
       "1      F     192\n",
       "2      M     186\n",
       "3      F     167"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_append = df.loc[:3,['Gender','Height']].copy()\n",
    "df_append"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new_row</th>\n",
       "      <td>F</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Gender  Height\n",
       "0            M     173\n",
       "1            F     192\n",
       "2            M     186\n",
       "3            F     167\n",
       "new_row      F     188"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series({'Gender':'F','Height':188},name='new_row')\n",
    "df_append.append(s)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### （b）用DataFrame添加表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new_1</th>\n",
       "      <td>F</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new_2</th>\n",
       "      <td>M</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Gender  Height\n",
       "0          M     173\n",
       "1          F     192\n",
       "2          M     186\n",
       "3          F     167\n",
       "new_1      F     188\n",
       "new_2      M     176"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp = pd.DataFrame({'Gender':['F','M'],'Height':[188,176]},index=['new_1','new_2'])\n",
    "df_append.append(df_temp)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. assign方法\n",
    "#### 该方法主要用于添加列，列名直接由参数指定："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "      <th>Letter</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Gender  Height Letter\n",
       "0      M     173      a\n",
       "1      F     192      b\n",
       "2      M     186      c\n",
       "3      F     167      d"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series(list('abcd'),index=range(4))\n",
    "df_append.assign(Letter=s)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 可以一次添加多个列："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "      <th>col1</th>\n",
       "      <th>col2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "      <td>MM</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "      <td>FF</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "      <td>MM</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "      <td>FF</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Gender  Height col1 col2\n",
       "0      M     173   MM    a\n",
       "1      F     192   FF    b\n",
       "2      M     186   MM    c\n",
       "3      F     167   FF    d"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_append.assign(col1=lambda x:x['Gender']*2,\n",
    "                 col2=s)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 二、combine与update\n",
    "### 1. comine方法\n",
    "#### comine和update都是用于表的填充函数，可以根据某种规则填充\n",
    "#### （a）填充对象\n",
    "#### 可以看出combine方法是按照表的顺序轮流进行逐列循环的，而且自动索引对齐，缺失值为NaN，理解这一点很重要"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0       M\n",
      "1       F\n",
      "10    NaN\n",
      "11    NaN\n",
      "Name: Gender, dtype: object 0     NaN\n",
      "1     NaN\n",
      "10      M\n",
      "11      F\n",
      "Name: Gender, dtype: object\n",
      "0     173.0\n",
      "1     192.0\n",
      "10      NaN\n",
      "11      NaN\n",
      "Name: Height, dtype: float64 0       NaN\n",
      "1       NaN\n",
      "10    161.0\n",
      "11    175.0\n",
      "Name: Height, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Gender Height\n",
       "0     NaN    NaN\n",
       "1     NaN    NaN\n",
       "10    NaN    NaN\n",
       "11    NaN    NaN"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_combine_1 = df.loc[:1,['Gender','Height']].copy()\n",
    "df_combine_2 = df.loc[10:11,['Gender','Height']].copy()\n",
    "df_combine_1.combine(df_combine_2,lambda x,y:print(x,y))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### （b）一些例子\n",
    "#### 例①：根据列均值的大小填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "0  8  6\n",
       "1  7  5"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 例子1\n",
    "df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})\n",
    "df2 = pd.DataFrame({'A': [8, 7], 'B': [6, 5]})\n",
    "df1.combine(df2,lambda x,y:x if x.mean()>y.mean() else y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 例②：索引对齐特性（默认状态下，后面的表没有的行列都会设置为NaN）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A    B    C\n",
       "0 NaN  NaN  NaN\n",
       "1 NaN  8.0  6.0\n",
       "2 NaN  7.0  5.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = pd.DataFrame({'B': [8, 7], 'C': [6, 5]},index=[1,2])\n",
    "df1.combine(df2,lambda x,y:x if x.mean()>y.mean() else y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 例③：使得df1原来符合条件的值不会被覆盖"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    C\n",
       "0  1.0  NaN  NaN\n",
       "1  2.0  8.0  6.0\n",
       "2  NaN  7.0  5.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.combine(df2,lambda x,y:x if x.mean()>y.mean() else y,overwrite=False) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 例④：在新增匹配df2的元素位置填充-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    C\n",
       "0  1.0 -1.0 -1.0\n",
       "1  2.0  8.0  6.0\n",
       "2 -1.0  7.0  5.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.combine(df2,lambda x,y:x if x.mean()>y.mean() else y,fill_value=-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### （c）combine_first方法\n",
    "#### 这个方法作用是用df2填补df1的缺失值，功能比较简单，但很多时候会比combine更常用，下面举两个例子："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B\n",
       "0  1.0  3.0\n",
       "1  0.0  4.0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})\n",
    "df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})\n",
    "df1.combine_first(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    C\n",
       "0  NaN  4.0  NaN\n",
       "1  0.0  3.0  1.0\n",
       "2  NaN  3.0  1.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]})\n",
    "df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])\n",
    "df1.combine_first(df2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. update方法\n",
    "#### （a）三个特点\n",
    "#### ①返回的框索引只会与被调用框的一致（默认使用左连接，下一节会介绍）\n",
    "#### ②第二个框中的nan元素不会起作用\n",
    "#### ③没有返回值，直接在df上操作\n",
    "#### （b）例子\n",
    "#### 例①：索引完全对齐情况下的操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "0  1  4\n",
       "1  2  5\n",
       "2  3  6"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'A': [1, 2, 3],\n",
    "                    'B': [400, 500, 600]})\n",
    "df2 = pd.DataFrame({'B': [4, 5, 6],\n",
    "                    'C': [7, 8, 9]})\n",
    "df1.update(df2)\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 例②：部分填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c</td>\n",
       "      <td>e</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "0  a  x\n",
       "1  b  d\n",
       "2  c  e"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'A': ['a', 'b', 'c'],\n",
    "                    'B': ['x', 'y', 'z']})\n",
    "df2 = pd.DataFrame({'B': ['d', 'e']}, index=[1,2])\n",
    "df1.update(df2)\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 例③：缺失值不会填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A      B\n",
       "0  1    4.0\n",
       "1  2  500.0\n",
       "2  3    6.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'A': [1, 2, 3],\n",
    "                    'B': [400, 500, 600]})\n",
    "df2 = pd.DataFrame({'B': [4, np.nan, 6]})\n",
    "df1.update(df2)\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 三、concat方法\n",
    "#### concat方法可以在两个维度上拼接，默认纵向凭借（axis=0），拼接方式默认外连接\n",
    "#### 所谓外连接，就是取拼接方向的并集，而'inner'时取拼接方向（若使用默认的纵向拼接，则为列的交集）的交集\n",
    "#### 下面举一些例子说明其参数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({'A': ['A0', 'A1'],\n",
    "                    'B': ['B0', 'B1']},\n",
    "                    index = [0,1])\n",
    "df2 = pd.DataFrame({'A': ['A2', 'A3'],\n",
    "                    'B': ['B2', 'B3']},\n",
    "                    index = [2,3])\n",
    "df3 = pd.DataFrame({'A': ['A1', 'A3'],\n",
    "                    'D': ['D1', 'D3'],\n",
    "                    'E': ['E1', 'E3']},\n",
    "                    index = [1,3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 默认状态拼接："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0  A0  B0\n",
       "1  A1  B1\n",
       "2  A2  B2\n",
       "3  A3  B3"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### axis=1时沿列方向拼接："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    A    B\n",
       "0   A0   B0  NaN  NaN\n",
       "1   A1   B1  NaN  NaN\n",
       "2  NaN  NaN   A2   B2\n",
       "3  NaN  NaN   A3   B3"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2],axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### join设置为内连接（由于axis=0，因此列取交集）："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A\n",
       "1  A1\n",
       "3  A3\n",
       "0  A0\n",
       "1  A1"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df3,df1],join='inner')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### join设置为外链接："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>D1</td>\n",
       "      <td>E1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>D3</td>\n",
       "      <td>E3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A    B    D    E\n",
       "1  A1  NaN   D1   E1\n",
       "3  A3  NaN   D3   E3\n",
       "0  A0   B0  NaN  NaN\n",
       "1  A1   B1  NaN  NaN"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df3,df1],join='outer',sort=True) #sort设置列排序，默认为False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### verify_integrity检查列是否唯一："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "#pd.concat([df3,df1],verify_integrity=True,sort=True) 报错"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 同样，可以添加Series："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>X</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>X0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>X1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   X\n",
       "0  A0  B0  X0\n",
       "1  A1  B1  X1"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series(['X0', 'X1'], name='X')\n",
    "pd.concat([df1,s],axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### key参数用于对不同的数据框增加一个标号，便于索引："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([('x', 0),\n",
       "            ('x', 1),\n",
       "            ('y', 2),\n",
       "            ('y', 3)],\n",
       "           )"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2], keys=['x', 'y'])\n",
    "pd.concat([df1,df2], keys=['x', 'y']).index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 四、merge与join\n",
    "### 1. merge函数\n",
    "#### merge函数的作用是将两个pandas对象横向合并，遇到重复的索引项时会使用笛卡尔积，默认inner连接，可选left、outer、right连接\n",
    "#### 所谓左连接，就是指以第一个表索引为基准，右边的表中如果不再左边的则不加入，如果在左边的就以笛卡尔积的方式加入\n",
    "#### merge/join与concat的不同之处在于on参数，可以指定某一个对象为key来进行连接\n",
    "#### 同样的，下面举一些例子："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],\n",
    "                     'key2': ['K0', 'K1', 'K0', 'K1'],\n",
    "                      'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                      'B': ['B0', 'B1', 'B2', 'B3']}) \n",
    "right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n",
    "                      'key2': ['K0', 'K0', 'K0', 'K0'],\n",
    "                      'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                      'D': ['D0', 'D1', 'D2', 'D3']})\n",
    "right2 = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n",
    "                      'key2': ['K0', 'K0', 'K0', 'K0'],\n",
    "                      'C': ['C0', 'C1', 'C2', 'C3']})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 以key1为准则连接，如果具有相同的列，则默认suffixes=('_x','_y')："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2_x</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key2_y</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K0</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2_x   A   B key2_y   C   D\n",
       "0   K0     K0  A0  B0     K0  C0  D0\n",
       "1   K0     K1  A1  B1     K0  C0  D0\n",
       "2   K1     K0  A2  B2     K0  C1  D1\n",
       "3   K1     K0  A2  B2     K0  C2  D2\n",
       "4   K2     K1  A3  B3     K0  C3  D3"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, on='key1')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 以多组键连接："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2   A   B   C   D\n",
       "0   K0   K0  A0  B0  C0  D0\n",
       "1   K1   K0  A2  B2  C1  D1\n",
       "2   K1   K0  A2  B2  C2  D2"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, on=['key1','key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 默认使用inner连接，因为merge只能横向拼接，所以取行向上keys的交集，下面看如果使用how=outer参数\n",
    "#### 注意：这里的how就是concat的join"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>K2</td>\n",
       "      <td>K0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2    A    B    C    D\n",
       "0   K0   K0   A0   B0   C0   D0\n",
       "1   K0   K1   A1   B1  NaN  NaN\n",
       "2   K1   K0   A2   B2   C1   D1\n",
       "3   K1   K0   A2   B2   C2   D2\n",
       "4   K2   K1   A3   B3  NaN  NaN\n",
       "5   K2   K0  NaN  NaN   C3   D3"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, how='outer', on=['key1','key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 左连接："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2   A   B    C    D\n",
       "0   K0   K0  A0  B0   C0   D0\n",
       "1   K0   K1  A1  B1  NaN  NaN\n",
       "2   K1   K0  A2  B2   C1   D1\n",
       "3   K1   K0  A2  B2   C2   D2\n",
       "4   K2   K1  A3  B3  NaN  NaN"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, how='left', on=['key1', 'key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 右连接："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K2</td>\n",
       "      <td>K0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2    A    B   C   D\n",
       "0   K0   K0   A0   B0  C0  D0\n",
       "1   K1   K0   A2   B2  C1  D1\n",
       "2   K1   K0   A2   B2  C2  D2\n",
       "3   K2   K0  NaN  NaN  C3  D3"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, how='right', on=['key1', 'key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 如果还是对笛卡尔积不太了解，请务必理解下面这个例子，由于B的所有元素为2，因此需要6行："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A_x</th>\n",
       "      <th>B</th>\n",
       "      <th>A_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A_x  B  A_y\n",
       "0    1  2    4\n",
       "1    1  2    5\n",
       "2    1  2    6\n",
       "3    2  2    4\n",
       "4    2  2    5\n",
       "5    2  2    6"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left = pd.DataFrame({'A': [1, 2], 'B': [2, 2]})\n",
    "right = pd.DataFrame({'A': [4, 5, 6], 'B': [2, 2, 2]})\n",
    "pd.merge(left, right, on='B', how='outer')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### validate检验的是到底哪一边出现了重复索引，如果是“one_to_one”则两侧索引都是唯一，如果\"one_to_many\"则左侧唯一"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "left = pd.DataFrame({'A': [1, 2], 'B': [2, 2]})\n",
    "right = pd.DataFrame({'A': [4, 5, 6], 'B': [2, 3, 4]})\n",
    "#pd.merge(left, right, on='B', how='outer',validate='one_to_one') #报错"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A_x</th>\n",
       "      <th>B</th>\n",
       "      <th>A_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A_x  B  A_y\n",
       "0  1.0  2  4.0\n",
       "1  2.0  1  NaN\n",
       "2  NaN  3  5.0\n",
       "3  NaN  4  6.0"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left = pd.DataFrame({'A': [1, 2], 'B': [2, 1]})\n",
    "pd.merge(left, right, on='B', how='outer',validate='one_to_one')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### indicator参数指示了，合并后该行索引的来源"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th>col_left</th>\n",
       "      <th>col_right</th>\n",
       "      <th>_merge</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>a</td>\n",
       "      <td>NaN</td>\n",
       "      <td>left_only</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>2.0</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>right_only</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>right_only</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col1 col_left  col_right      _merge\n",
       "0     0        a        NaN   left_only\n",
       "1     1        b        2.0        both\n",
       "2     2      NaN        2.0  right_only\n",
       "3     2      NaN        2.0  right_only"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']})\n",
    "df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]})\n",
    "pd.merge(df1, df2, on='col1', how='outer', indicator=True) #indicator='indicator_column'也是可以的"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. join函数\n",
    "#### join函数作用是将多个pandas对象横向拼接，遇到重复的索引项时会使用笛卡尔积，默认左连接，可选inner、outer、right连接"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>K0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A   B    C    D\n",
       "K0  A0  B0   C0   D0\n",
       "K1  A1  B1  NaN  NaN\n",
       "K2  A2  B2   C2   D2"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                     'B': ['B0', 'B1', 'B2']},\n",
    "                    index=['K0', 'K1', 'K2'])\n",
    "right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],\n",
    "                      'D': ['D0', 'D2', 'D3']},\n",
    "                    index=['K0', 'K2', 'K3'])\n",
    "left.join(right)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 对于many_to_one模式下的合并，往往join更为方便\n",
    "#### 同样可以指定key："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key   C   D\n",
       "0  A0  B0  K0  C0  D0\n",
       "1  A1  B1  K1  C1  D1\n",
       "2  A2  B2  K0  C0  D0\n",
       "3  A3  B3  K1  C1  D1"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                     'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                     'key': ['K0', 'K1', 'K0', 'K1']})\n",
    "right = pd.DataFrame({'C': ['C0', 'C1'],\n",
    "                      'D': ['D0', 'D1']},\n",
    "                     index=['K0', 'K1'])\n",
    "left.join(right, on='key')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 多层key："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key1 key2    C    D\n",
       "0  A0  B0   K0   K0   C0   D0\n",
       "1  A1  B1   K0   K1  NaN  NaN\n",
       "2  A2  B2   K1   K0   C1   D1\n",
       "3  A3  B3   K2   K1   C3   D3"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                     'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                     'key1': ['K0', 'K0', 'K1', 'K2'],\n",
    "                     'key2': ['K0', 'K1', 'K0', 'K1']})\n",
    "index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'),\n",
    "                                   ('K2', 'K0'), ('K2', 'K1')],names=['key1','key2'])\n",
    "right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                      'D': ['D0', 'D1', 'D2', 'D3']},\n",
    "                     index=index)\n",
    "left.join(right, on=['key1','key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 五、问题与练习"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. 问题\n",
    "#### 【问题一】 请思考什么是append/assign/combine/update/concat/merge/join各自最适合使用的场景，并举出相应的例子。\n",
    "#### 【问题二】 merge_ordered和merge_asof的作用是什么？和merge是什么关系？\n",
    "#### 【问题三】 请构造一个多级索引与多级索引合并的例子，尝试使用不同的合并函数。\n",
    "#### 【问题四】 上文提到了连接的笛卡尔积，那么当连接方式变化时（inner/outer/left/right），这种笛卡尔积规则会相应变化吗？请构造相应例子。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. 练习\n",
    "#### 【练习一】有2张公司的员工信息表，每个公司共有16名员工，共有五个公司，请解决如下问题："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Company</th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>a1</td>\n",
       "      <td>47</td>\n",
       "      <td>188</td>\n",
       "      <td>63.7</td>\n",
       "      <td>25819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A</td>\n",
       "      <td>a3</td>\n",
       "      <td>39</td>\n",
       "      <td>172</td>\n",
       "      <td>55.9</td>\n",
       "      <td>21983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A</td>\n",
       "      <td>a4</td>\n",
       "      <td>43</td>\n",
       "      <td>158</td>\n",
       "      <td>62.5</td>\n",
       "      <td>21755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>a6</td>\n",
       "      <td>42</td>\n",
       "      <td>182</td>\n",
       "      <td>76.9</td>\n",
       "      <td>17354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A</td>\n",
       "      <td>a7</td>\n",
       "      <td>49</td>\n",
       "      <td>171</td>\n",
       "      <td>94.6</td>\n",
       "      <td>6177</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Company Name  Age  Height  Weight  Salary\n",
       "0       A   a1   47     188    63.7   25819\n",
       "1       A   a3   39     172    55.9   21983\n",
       "2       A   a4   43     158    62.5   21755\n",
       "3       A   a6   42     182    76.9   17354\n",
       "4       A   a7   49     171    94.6    6177"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('data/Employee1.csv').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Company</th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>a1</td>\n",
       "      <td>30</td>\n",
       "      <td>156</td>\n",
       "      <td>91.2</td>\n",
       "      <td>28133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A</td>\n",
       "      <td>a2</td>\n",
       "      <td>50</td>\n",
       "      <td>190</td>\n",
       "      <td>83.4</td>\n",
       "      <td>6673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A</td>\n",
       "      <td>a3</td>\n",
       "      <td>34</td>\n",
       "      <td>168</td>\n",
       "      <td>96.6</td>\n",
       "      <td>16503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>a5</td>\n",
       "      <td>51</td>\n",
       "      <td>176</td>\n",
       "      <td>97.2</td>\n",
       "      <td>23294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A</td>\n",
       "      <td>a6</td>\n",
       "      <td>37</td>\n",
       "      <td>183</td>\n",
       "      <td>93.2</td>\n",
       "      <td>19256</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Company Name  Age  Height  Weight  Salary\n",
       "0       A   a1   30     156    91.2   28133\n",
       "1       A   a2   50     190    83.4    6673\n",
       "2       A   a3   34     168    96.6   16503\n",
       "3       A   a5   51     176    97.2   23294\n",
       "4       A   a6   37     183    93.2   19256"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('data/Employee2.csv').head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### (a) 每个公司有多少员工满足如下条件：既出现第一张表，又出现在第二张表。\n",
    "#### (b) 将所有不符合(a)中条件的行筛选出来，合并为一张新表，列名与原表一致。\n",
    "#### (c) 现在需要编制所有80位员工的信息表，对于(b)中的员工要求不变，对于满足(a)条件员工，它们在某个指标的数值，取偏离它所属公司中满足(b)员工的均值数较小的哪一个，例如：P公司在两张表的交集为{p1}，并集扣除交集为{p2,p3,p4}，那么如果后者集合的工资均值为1万元，且p1在表1的工资为13000元，在表2的工资为9000元，那么应该最后取9000元作为p1的工资，最后对于没有信息的员工，利用缺失值填充。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 【练习二】有2张课程的分数表（分数随机生成），但专业课（学科基础课、专业必修课、专业选修课）与其他课程混在一起，请解决如下问题："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>课程名字</th>\n",
       "      <th>课程类别</th>\n",
       "      <th>学分</th>\n",
       "      <th>分数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>思想道德修养与法律基础</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>89.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>云计算应用与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>96.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>社会计算</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>78.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>深度学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>人工智能导论</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          课程名字   课程类别  学分    分数\n",
       "0  思想道德修养与法律基础    思政类   3  89.0\n",
       "1     云计算应用与开发  专业选修课   3  96.0\n",
       "2         社会计算  专业选修课   3  78.0\n",
       "3         深度学习  专业选修课   3  75.0\n",
       "4       人工智能导论  专业必修课   3  84.0"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('data/Course1.csv').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>课程名字</th>\n",
       "      <th>课程类别</th>\n",
       "      <th>学分</th>\n",
       "      <th>分数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>高等数学（一）</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>数据科学与工程导论</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>专业英语</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>概率论</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>3</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>计算机系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        课程名字   课程类别  学分     分数\n",
       "0    高等数学（一）  学科基础课   4   99.0\n",
       "1  数据科学与工程导论  学科基础课   3    NaN\n",
       "2       专业英语  学科基础课   2  100.0\n",
       "3        概率论  学科基础课   3   99.0\n",
       "4      计算机系统  专业必修课   4   80.0"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('data/Course2.csv').head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### (a) 将两张表分别拆分为专业课与非专业课（结果为四张表）。\n",
    "#### (b) 将两张专业课的分数表和两张非专业课的分数表分别合并。\n",
    "#### (c) 不使用(a)中的步骤，请直接读取两张表合并后拆分。\n",
    "#### (d) 专业课程中有缺失值吗，如果有的话请在完成(3)的同时，用组内（3种类型的专业课）均值填充缺失值后拆分。"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
